################################################################################
## This R script reproduces the results from the paper "Unpacking Bribery:    ##
## Petty Corruption and Favor Exchanges"                                      ##
##                                                                            ##
## R Version: 4.3.1                                                           ##
## Operating system: macOS Sonoma 14.3                                        ##
##                                                                            ##
## Author: Diego Romero                                                       ##
## Institution: Utah State University                                         ##
################################################################################

rm(list = ls())
set.seed(1992)
options(scipen = 100)

library(tidyverse) # Version: 2.0.0
library(ggplot2) # Version: 3.4.2
library(plotrix) # Version: 3.8-4

# Loading Data
df <- read.csv("unpackingbribery_data.csv")

ls(df)


#########################################
## Figures and Tables in the Main Text ## 
#########################################

# Summary Statistics
stargazer(df)

# Table 1: Connections with Public Officials

# Municipal Worker, Any Connection
mean(df$rela_muni_yn,na.rm=TRUE)

# Municipal Worker, Acquaintance, Friend, Relative
prop.table(table(df$rela_muni[df$rela_muni_yn==1]))

# Municipal Police Agent, Any Connection
mean(df$rela_munipolice_yn,na.rm=TRUE)

# Municipal Police Agent, Acquaintance, Friend, Relative
prop.table(table(df$rela_munipolice[df$rela_munipolice_yn==1]))

# National Police Agent, Any Connection
mean(df$rela_police_yn,na.rm=TRUE)

# National Police Agent, Acquaintance, Friend, Relative
prop.table(table(df$rela_police[df$rela_police_yn==1]))


# Table 2: Estimates of Favor Exchanges and Extortion

# Column 1: Implicit Favors

## Full Sample:
### Direct Question 1:
mean(df$ffavor_direct,na.rm=TRUE)
std.error(df$ffavor_direct,na.rm=TRUE)

### List Experiment, Difference in Means:
summary(lm(listexp_favors_count ~ listexp_favors_trt,data=df))

## People who interacted with public officials
### Direct Question 1:
mean(df[which(df$interact==1),]$ffavor_direct,na.rm=TRUE)
std.error(df[which(df$interact==1),]$ffavor_direct,na.rm=TRUE)

### Direct Question 2:
mean(df$ffavor_mp,na.rm=TRUE)
std.error(df$ffavor_mp,na.rm=TRUE)


# Column 2: Bribery

## Full Sample:
### Direct Question 1:
mean(df$pfavor_direct,na.rm=TRUE)
std.error(df$pfavor_direct,na.rm=TRUE)

### List Experiment, Difference in Means:
summary(lm(listexp_bribes_count ~ listexp_bribes_trt,data=df))

## People who interacted with public officials
### Direct Question 1:
mean(df[which(df$interact==1),]$pfavor_direct,na.rm=TRUE)
std.error(df[which(df$interact==1),]$pfavor_direct,na.rm=TRUE)

### Direct Question 2:
mean(df$pfavor_mp,na.rm=TRUE)
std.error(df$pfavor_mp,na.rm=TRUE)


# Column 3: Extortion

## Full Sample:
### Direct Question 1:
mean(df$extort_direct,na.rm=TRUE)
std.error(df$extort_direct,na.rm=TRUE)

### List Experiment, Difference in Means:
summary(lm(listexp_extort_count ~ listexp_extort_trt,data=df))

## People who interacted with public officials
### Direct Question 1:
mean(df[which(df$interact==1),]$extort_direct,na.rm=TRUE)
std.error(df[which(df$interact==1),]$extort_direct,na.rm=TRUE)

### Direct Question 2:
mean(df$extort_mp,na.rm=TRUE)
std.error(df$extort_mp,na.rm=TRUE)


# Figure 1: Who Engages in Favor Exchanges
# (a) Proximity
dfs <- df[which(is.na(df$connected_all)==FALSE),]
dfs <- dfs %>%
  mutate(proximity_bin = case_when(connected_all > 0.411 ~ "Yes",
                                   connected_all <= 0.411 ~ "No"))
# (Left) Implicit Favor Exchanges
ggplot(data = dfs, aes(x=as.factor(proximity_bin), y=ffavor_direct)) +
  stat_summary(fun="mean", geom="bar",fill="cornflowerblue") + 
  stat_summary(fun.data = mean_se, geom = "errorbar") +
  xlab("Respondent is socially proximate to public officials") + ylab("% Implicit Favor Exchanges") +
  theme_bw()

# (Right) Bribery
ggplot(data = dfs, aes(x=as.factor(proximity_bin), y=pfavor_direct)) +
  stat_summary(fun="mean", geom="bar",fill="darkorange") + 
  stat_summary(fun.data = mean_se, geom = "errorbar") +
  xlab("Respondent is socially proximate to public officials") + ylab("% Bribery") +
  theme_bw()

# (b) Centrality
dfs <- df[which(is.na(df$leadership_often)==FALSE),]
dfs <- dfs %>%
  mutate(centrality_bin = case_when(leadership_often > 1.773 ~ "Yes",
                                    leadership_often <= 1.773 ~ "No"))
# (Left) Implicit Favor Exchanges
ggplot(data = dfs, aes(x=as.factor(centrality_bin), y=ffavor_direct)) +
  stat_summary(fun="mean", geom="bar",fill="cornflowerblue") + 
  stat_summary(fun.data = mean_se, geom = "errorbar") +
  xlab("Respondent is socially central") + ylab("% Implicit Favor Exchanges") +
  theme_bw()

# (Right) Bribery
ggplot(data = dfs, aes(x=as.factor(centrality_bin), y=pfavor_direct)) +
  stat_summary(fun="mean", geom="bar",fill="darkorange") + 
  stat_summary(fun.data = mean_se, geom = "errorbar") +
  xlab("Respondent is socially central") + ylab("% Bribery") +
  theme_bw()


# Table 3: Proximity, Centrality and Interactions with Public Officials


# MAIN PAPER: Interactions with public officials, Logit Models
m1_logit_naive <- glm(interact ~ connected_all + leadership_often, data = df, family = "binomial")
m1_logit_naive$vcov <- vcovCL(m1_logit_naive, cluster = ~ municipio, type = "HC0")
m1_logit_naive_coef = coeftest(m1_logit_naive, m1_logit_naive$vcov)
m1_logit_naive_cse <- sqrt(diag(m1_logit_naive$vcov))

m1_logit_controls <- glm(interact ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status, data = df, family = "binomial")
m1_logit_controls$vcov <- vcovCL(m1_logit_controls, cluster = ~ municipio, type = "HC0")
m1_logit_controls_coef = coeftest(m1_logit_controls, m1_logit_controls$vcov)
m1_logit_controls_cse <- sqrt(diag(m1_logit_controls$vcov))

m1_logit_fe <- glm(interact ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m1_logit_fe$vcov <- vcovCL(m1_logit_fe, cluster = ~ municipio, type = "HC0")
m1_logit_fe_coef = coeftest(m1_logit_fe, m1_logit_fe$vcov)
m1_logit_fe_cse <- sqrt(diag(m1_logit_fe$vcov))

margins(m1_logit_fe)
# connected_all leadership_often  
# 0.1062           0.0533

sd(df$connected_all,na.rm=TRUE)
# 0.4812298

# 0.4812298*0.1062 

summary(df$leadership_often)
sd(df$leadership_often,na.rm=TRUE)
# 0.7491753

# 0.7491753*0.0533

m1_logit_pluscon <- glm(interact ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + comm_meeting + volunteer + protest + pol_party + factor(municipio), data = df, family = "binomial")
m1_logit_pluscon$vcov <- vcovCL(m1_logit_pluscon, cluster = ~ municipio, type = "HC0")
m1_logit_pluscon_coef = coeftest(m1_logit_pluscon, m1_logit_pluscon$vcov)
m1_logit_pluscon_cse <- sqrt(diag(m1_logit_pluscon$vcov))

# TABLE
stargazer(m1_logit_naive, m1_logit_controls, m1_logit_fe,m1_logit_pluscon,
          se = list(m1_logit_naive_cse, m1_logit_controls_cse, m1_logit_fe_cse,m1_logit_pluscon_cse), 
          title="Proximity, Centrality and Interactions with Public Officials",
          align=TRUE, dep.var.labels=c("Interacted with Public Officials"), 
          covariate.labels = c("Proximity", "Centrality","Asset Count","Enough Income","Spanish","Male","Age","Household Size", "Distance", "Secondary Education", "Employment","Attend Meetings", "Volunteer","Protest","Affiliate"), 
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001), 
          star.char = c("+", "*", "**","***"),
          no.space=TRUE, add.lines = list(c("Municipality FE", " "," ","Y","Y")),
          notes = "Significance levels: + p<0.10, * p<0.05, ** p<0.01, *** p<0.001", 
          notes.append = FALSE)



## Table 4: Proximity, Centrality and Favors Exchanges I
##          Using Direct Questions 1

# implicit favor exchanges
m1_logit_naive <- glm(ffavor_direct ~ connected_all + leadership_often, data = df, family = "binomial")
m1_logit_naive$vcov <- vcovCL(m1_logit_naive, cluster = ~ municipio, type = "HC0")
m1_logit_naive_coef = coeftest(m1_logit_naive, m1_logit_naive$vcov)
m1_logit_naive_cse <- sqrt(diag(m1_logit_naive$vcov))

m1_logit_controls <- glm(ffavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status , data = df, family = "binomial")
m1_logit_controls$vcov <- vcovCL(m1_logit_controls, cluster = ~ municipio, type = "HC0")
m1_logit_controls_coef = coeftest(m1_logit_controls, m1_logit_controls$vcov)
m1_logit_controls_cse <- sqrt(diag(m1_logit_controls$vcov))

m1_logit_fe <- glm(ffavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m1_logit_fe$vcov <- vcovCL(m1_logit_fe, cluster = ~ municipio, type = "HC0")
m1_logit_fe_coef = coeftest(m1_logit_fe, m1_logit_fe$vcov)
m1_logit_fe_cse <- sqrt(diag(m1_logit_fe$vcov))

margins(m1_logit_fe)
#plot(margins(m1_logit_fe))
# connected_all leadership_often  
#    0.04915        0.03791

summary(df$connected_all)
sd(df$connected_all,na.rm=TRUE)
# 0.4812298

# (0.4812298*0.04915)*100

summary(df$leadership_often)
sd(df$leadership_often,na.rm=TRUE)
# 0.7491753

# (0.7491753*0.03791)*100



m1_logit_inter <- glm(ffavor_direct ~ connected_all + leadership_often + interact + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m1_logit_inter$vcov <- vcovCL(m1_logit_inter, cluster = ~ municipio, type = "HC0")
m1_logit_inter_coef = coeftest(m1_logit_inter, m1_logit_inter$vcov)
m1_logit_inter_cse <- sqrt(diag(m1_logit_inter$vcov))

m1_logit_fecont <- glm(ffavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + comm_meeting + volunteer + protest + pol_party + factor(municipio), data = df, family = "binomial")
m1_logit_fecont$vcov <- vcovCL(m1_logit_fecont, cluster = ~ municipio, type = "HC0")
m1_logit_fecont_coef = coeftest(m1_logit_fecont, m1_logit_fecont$vcov)
m1_logit_fecont_cse <- sqrt(diag(m1_logit_fecont$vcov))

# Bribes
m2_logit_naive <- glm(pfavor_direct ~ connected_all + leadership_often, data = df, family = "binomial")
m2_logit_naive$vcov <- vcovCL(m2_logit_naive, cluster = ~ municipio, type = "HC0")
m2_logit_naive_coef = coeftest(m2_logit_naive, m2_logit_naive$vcov)
m2_logit_naive_cse <- sqrt(diag(m2_logit_naive$vcov))

m2_logit_controls <- glm(pfavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status , data = df, family = "binomial")
m2_logit_controls$vcov <- vcovCL(m2_logit_controls, cluster = ~ municipio, type = "HC0")
m2_logit_controls_coef = coeftest(m2_logit_controls, m2_logit_controls$vcov)
m2_logit_controls_cse <- sqrt(diag(m2_logit_controls$vcov))

m2_logit_fe <- glm(pfavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m2_logit_fe$vcov <- vcovCL(m2_logit_fe, cluster = ~ municipio, type = "HC0")
m2_logit_fe_coef = coeftest(m2_logit_fe, m2_logit_fe$vcov)
m2_logit_fe_cse <- sqrt(diag(m2_logit_fe$vcov))


margins(m2_logit_fe)
#plot(margins(m1_logit_fe))
# connected_all leadership_often  
#    0.03135          0.01109

summary(df$connected_all)
sd(df$connected_all,na.rm=TRUE)
# 0.4812298

# (0.4812298*0.03135)*100

summary(df$leadership_often)
sd(df$leadership_often,na.rm=TRUE)
# 0.7491753

# (0.7491753*0.01109)*100

m2_logit_inter <- glm(pfavor_direct ~ connected_all + leadership_often + interact + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m2_logit_inter$vcov <- vcovCL(m2_logit_inter, cluster = ~ municipio, type = "HC0")
m2_logit_inter_coef = coeftest(m2_logit_inter, m2_logit_inter$vcov)
m2_logit_inter_cse <- sqrt(diag(m2_logit_inter$vcov))

m2_logit_fecont <- glm(pfavor_direct ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + comm_meeting + volunteer + protest + pol_party + factor(municipio), data = df, family = "binomial")
m2_logit_fecont$vcov <- vcovCL(m2_logit_fecont, cluster = ~ municipio, type = "HC0")
m2_logit_fecont_coef = coeftest(m2_logit_fecont, m2_logit_fecont$vcov)
m2_logit_fecont_cse <- sqrt(diag(m2_logit_fecont$vcov))

stargazer(m1_logit_naive, m1_logit_controls, m1_logit_fe, m1_logit_inter, m1_logit_fecont, m2_logit_naive, m2_logit_controls, m2_logit_fe, m2_logit_inter, m2_logit_fecont,
          se = list(m1_logit_naive_cse, m1_logit_controls_cse, m1_logit_fe_cse, m1_logit_inter_cse, m1_logit_fecont_cse, m2_logit_naive_cse, m2_logit_controls_cse, m2_logit_fe_cse, m2_logit_inter_cse, m2_logit_fecont_cse), 
          title="Proximity, Centrality and Favor Exchanges I",
          align=TRUE, dep.var.labels=c("Implicit Favors (Direct 1)","Bribes (Direct 1)"),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001), 
          star.char = c("+", "*", "**","***"),
          no.space=TRUE, 
          add.lines = list(
            c("Demographics", " ","Y","Y","Y","Y", " ","Y","Y","Y","Y"),
            c("Civic Engagement"," "," "," "," ","Y", " ",""," "," ","Y"),
            c("Municipality FE", " "," ","Y","Y","Y", " "," ","Y","Y","Y")
          ),  
          keep = c("connected_all","leadership_often","interact","Constant"),
          notes = "Significance levels: + p<0.10, * p<0.05, ** p<0.01, *** p<0.001", 
          notes.append = FALSE)


# Table 5: Proximity, Centrality and Favor Exchanges II
# implicit favor exchanges
m3_logit_naive <- glm(ffavor_mp ~ connected_all + leadership_often, data = df, family = "binomial")
m3_logit_naive$vcov <- vcovCL(m3_logit_naive, cluster = ~ municipio, type = "HC0")
m3_logit_naive_coef = coeftest(m3_logit_naive, m3_logit_naive$vcov)
m3_logit_naive_cse <- sqrt(diag(m3_logit_naive$vcov))

m3_logit_controls <- glm(ffavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status , data = df, family = "binomial")
m3_logit_controls$vcov <- vcovCL(m3_logit_controls, cluster = ~ municipio, type = "HC0")
m3_logit_controls_coef = coeftest(m3_logit_controls, m3_logit_controls$vcov)
m3_logit_controls_cse <- sqrt(diag(m3_logit_controls$vcov))

m3_logit_fe <- glm(ffavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m3_logit_fe$vcov <- vcovCL(m3_logit_fe, cluster = ~ municipio, type = "HC0")
m3_logit_fe_coef = coeftest(m3_logit_fe, m3_logit_fe$vcov)
m3_logit_fe_cse <- sqrt(diag(m3_logit_fe$vcov))

margins(m3_logit_fe)
#plot(margins(m1_logit_fe))
# connected_all leadership_often  
#    0.05568          0.02887

summary(df$connected_all)
sd(df$connected_all,na.rm=TRUE)
# 0.4812298

# (0.4812298*0.05568)*100

summary(df$leadership_often)
sd(df$leadership_often,na.rm=TRUE)
# 0.7491753

# (0.7491753*0.02887)*100

m3_logit_fecont <- glm(ffavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + comm_meeting + volunteer + protest + pol_party + factor(municipio), data = df, family = "binomial")
m3_logit_fecont$vcov <- vcovCL(m3_logit_fecont, cluster = ~ municipio, type = "HC0")
m3_logit_fecont_coef = coeftest(m3_logit_fecont, m3_logit_fecont$vcov)
m3_logit_fecont_cse <- sqrt(diag(m3_logit_fecont$vcov))

# Bribes
m4_logit_naive <- glm(pfavor_mp ~ connected_all + leadership_often, data = df, family = "binomial")
m4_logit_naive$vcov <- vcovCL(m4_logit_naive, cluster = ~ municipio, type = "HC0")
m4_logit_naive_coef = coeftest(m4_logit_naive, m4_logit_naive$vcov)
m4_logit_naive_cse <- sqrt(diag(m4_logit_naive$vcov))

m4_logit_controls <- glm(pfavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status , data = df, family = "binomial")
m4_logit_controls$vcov <- vcovCL(m4_logit_controls, cluster = ~ municipio, type = "HC0")
m4_logit_controls_coef = coeftest(m4_logit_controls, m4_logit_controls$vcov)
m4_logit_controls_cse <- sqrt(diag(m4_logit_controls$vcov))

m4_logit_fe <- glm(pfavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + factor(municipio), data = df, family = "binomial")
m4_logit_fe$vcov <- vcovCL(m4_logit_fe, cluster = ~ municipio, type = "HC0")
m4_logit_fe_coef = coeftest(m4_logit_fe, m4_logit_fe$vcov)
m4_logit_fe_cse <- sqrt(diag(m4_logit_fe$vcov))

margins(m4_logit_fe)
#plot(margins(m1_logit_fe))
# connected_all leadership_often  
#   0.0148         0.002337 

summary(df$connected_all)
sd(df$connected_all,na.rm=TRUE)
# 0.4812298

# (0.4812298*0.0148)*100

summary(df$leadership_often)
sd(df$leadership_often,na.rm=TRUE)
# 0.7491753

# (0.7491753*0.002337)*100

m4_logit_fecont <- glm(pfavor_mp ~ connected_all + leadership_often + asset_count + b_expenses + language_spanish + gender + age + hh_size + km_to_cabecera + as.factor(education_bin) + work_status + comm_meeting + volunteer + protest + pol_party + factor(municipio), data = df, family = "binomial")
m4_logit_fecont$vcov <- vcovCL(m4_logit_fecont, cluster = ~ municipio, type = "HC0")
m4_logit_fecont_coef = coeftest(m4_logit_fecont, m4_logit_fecont$vcov)
m4_logit_fecont_cse <- sqrt(diag(m4_logit_fecont$vcov))

stargazer(m3_logit_naive, m3_logit_controls, m3_logit_fe, m3_logit_fecont, m4_logit_naive, m4_logit_controls, m4_logit_fe, m4_logit_fecont,
          se = list(m3_logit_naive_cse, m3_logit_controls_cse, m3_logit_fe_cse, m3_logit_fecont_cse, m4_logit_naive_cse, m4_logit_controls_cse, m4_logit_fe_cse, m4_logit_fecont_cse), 
          title="Proximity, Centrality and Favor Exchanges II",
          align=TRUE, dep.var.labels=c("Implicit Favors (Direct 2)","Bribes (Direct 2)"),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001), 
          star.char = c("+", "*", "**","***"),
          no.space=TRUE, 
          add.lines = list(
            c("Demographics", " ","Y","Y","Y", " ","Y","Y","Y"),
            c("Civic Engagement"," "," "," ","Y", " ",""," ","Y"),
            c("Municipality FE", " "," ","Y","Y", " "," ","Y","Y")
          ),  
          keep = c("connected_all","leadership_often","Constant"),
          notes = "Significance levels: + p<0.10, * p<0.05, ** p<0.01, *** p<0.001", 
          notes.append = FALSE)





